# Purpose: assign each query term with a probability based on our 2D probability model.
# key: queryTerm
# value: a tuple has the following format (freqInQL,length)
inputFileName = "/data3/obukai/the_new_trip_of_feature_generation/gov2ClearYourMindAndDoItAgain/probabilityDistributionEstimationByProf/fourSetOfQueriesByProf20130410/smallBucketsEquallizationMethodByProf20130411/queryTermsFrom100KWithTheirTrueProbablityAndOurOwnModelPredictedProbablity1D_2D_GoodTuringProbabilityAdded20130429"
inputFileHandler = open(inputFileName,"r")
inputFileHandler.readline()


outputFileName = "/data3/obukai/the_new_trip_of_feature_generation/gov2ClearYourMindAndDoItAgain/queryTermProbability_predicted_by_our_2D"
outputFileHandler = open(outputFileName,"w")

totalValue = 0
for line in inputFileHandler.readlines():
    lineElements = line.strip().split(" ")
    queryTerm = lineElements[0]
    currentValue = float( lineElements[3] )
    outputFileHandler.write(queryTerm + " " + str(currentValue) + "\n")
    totalValue += currentValue
print "totalValue:",totalValue

inputFileHandler.close()
outputFileHandler.close()